{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7343d0bb-11b7-4a03-a410-e6d863cde034",
   "metadata": {},
   "outputs": [],
   "source": [
    "from glob import glob\n",
    "import pandas as pd\n",
    "from scipy.stats import mannwhitneyu, fisher_exact\n",
    "from statsmodels.sandbox.stats.multicomp import multipletests\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from skbio.stats.composition import clr\n",
    "\n",
    "sns.set_style('whitegrid')\n",
    "\n",
    "def p_adjust(pvalues, method='fdr_bh'):\n",
    "    res = multipletests(pvalues, method=method)\n",
    "    return np.array(res[1], dtype=float)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "563ad34e-841c-49af-888b-d69862313065",
   "metadata": {},
   "source": [
    "# Correlate proteomics data with median titers\n",
    "##### 7/18/22\n",
    "##### Michael Shaffer\n",
    "##### Merck ESC, Sys bio group\n",
    "\n",
    "After some success with KOs and more success with metabolomics we decided to go into proteomics. Same exact approach as in metabolomics."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18d883c6-2a81-4fd5-a947-89fd24c6fa12",
   "metadata": {},
   "source": [
    "## Read in the data\n",
    "\n",
    "This is a data sheet that I got directly from Hendrik."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c72a493d-1ca8-4e8a-b515-e49024f0c039",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BioID</th>\n",
       "      <th>Well</th>\n",
       "      <th>Run Number</th>\n",
       "      <th>Hospital</th>\n",
       "      <th>VisitID</th>\n",
       "      <th>Age</th>\n",
       "      <th>Draw</th>\n",
       "      <th>Matrix</th>\n",
       "      <th>Draw_Matrix</th>\n",
       "      <th>Draw_Matrix_Time</th>\n",
       "      <th>...</th>\n",
       "      <th>sp|Q6UWP8|SBSN_HUMAN</th>\n",
       "      <th>sp|Q6UXB8|PI16_HUMAN</th>\n",
       "      <th>sp|Q92736|RYR2_HUMAN</th>\n",
       "      <th>sp|Q96IY4|CBPB2_HUMAN</th>\n",
       "      <th>sp|Q96PD5|PGRP2_HUMAN</th>\n",
       "      <th>sp|Q9HDC9|APMAP_HUMAN</th>\n",
       "      <th>sp|Q9NZP8|C1RL_HUMAN</th>\n",
       "      <th>sp|Q9UBP9|GULP1_HUMAN</th>\n",
       "      <th>sp|Q9UGM5|FETUB_HUMAN</th>\n",
       "      <th>sp|Q9Y490|TLN1_HUMAN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>106A</td>\n",
       "      <td>A1</td>\n",
       "      <td>5</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>63</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>18.2125721</td>\n",
       "      <td>23.48308372</td>\n",
       "      <td>20.45380974</td>\n",
       "      <td>21.819561</td>\n",
       "      <td>25.08229065</td>\n",
       "      <td>20.85553551</td>\n",
       "      <td>20.4588604</td>\n",
       "      <td>28.83514977</td>\n",
       "      <td>21.02322769</td>\n",
       "      <td>20.34404564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>209A</td>\n",
       "      <td>A2</td>\n",
       "      <td>6</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>55</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>16.90970612</td>\n",
       "      <td>23.31561852</td>\n",
       "      <td>18.60210037</td>\n",
       "      <td>22.21127701</td>\n",
       "      <td>24.88887978</td>\n",
       "      <td>20.59141159</td>\n",
       "      <td>17.14304733</td>\n",
       "      <td>28.68237686</td>\n",
       "      <td>21.82271576</td>\n",
       "      <td>19.7013855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>214A</td>\n",
       "      <td>A8</td>\n",
       "      <td>12</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>89</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>13.34554386</td>\n",
       "      <td>23.58817863</td>\n",
       "      <td>18.65951729</td>\n",
       "      <td>21.8640461</td>\n",
       "      <td>25.16353798</td>\n",
       "      <td>21.04162788</td>\n",
       "      <td>18.74650764</td>\n",
       "      <td>29.10836601</td>\n",
       "      <td>19.95448685</td>\n",
       "      <td>20.4085598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>227A</td>\n",
       "      <td>C3</td>\n",
       "      <td>21</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>57</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>20.80704689</td>\n",
       "      <td>23.25824928</td>\n",
       "      <td>21.18655968</td>\n",
       "      <td>21.24667168</td>\n",
       "      <td>25.07989311</td>\n",
       "      <td>19.46746445</td>\n",
       "      <td>20.6216526</td>\n",
       "      <td>28.35871315</td>\n",
       "      <td>20.65556526</td>\n",
       "      <td>20.66396141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A</th>\n",
       "      <td>124A</td>\n",
       "      <td>C6</td>\n",
       "      <td>24</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>97</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>17.50129318</td>\n",
       "      <td>23.05339622</td>\n",
       "      <td>21.60560417</td>\n",
       "      <td>21.72242355</td>\n",
       "      <td>25.20258331</td>\n",
       "      <td>19.91134644</td>\n",
       "      <td>21.13340378</td>\n",
       "      <td>28.25678444</td>\n",
       "      <td>20.77088356</td>\n",
       "      <td>20.50154495</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 267 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         BioID Well Run Number  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A  106A   A1          5   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A  209A   A2          6   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A  214A   A8         12   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A  227A   C3         21   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A  124A   C6         24   \n",
       "\n",
       "                                                Hospital VisitID Age  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A  Well Check 02m      V5  63   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A  Well Check 02m      V5  55   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A  Well Check 02m      V5  89   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A  Well Check 02m      V5  57   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A  Well Check 02m      V5  97   \n",
       "\n",
       "                                               Draw Matrix      Draw_Matrix  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A  Heelstick  Serum  Heelstick_Serum   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A  Heelstick  Serum  Heelstick_Serum   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A  Heelstick  Serum  Heelstick_Serum   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A  Heelstick  Serum  Heelstick_Serum   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A  Heelstick  Serum  Heelstick_Serum   \n",
       "\n",
       "                                            Draw_Matrix_Time  ...  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A  V5_Heelstick_Serum  ...   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A  V5_Heelstick_Serum  ...   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A  V5_Heelstick_Serum  ...   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A  V5_Heelstick_Serum  ...   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A  V5_Heelstick_Serum  ...   \n",
       "\n",
       "                                         sp|Q6UWP8|SBSN_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           18.2125721   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A          16.90970612   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A          13.34554386   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A          20.80704689   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A          17.50129318   \n",
       "\n",
       "                                         sp|Q6UXB8|PI16_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A          23.48308372   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A          23.31561852   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A          23.58817863   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A          23.25824928   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A          23.05339622   \n",
       "\n",
       "                                         sp|Q92736|RYR2_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A          20.45380974   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A          18.60210037   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A          18.65951729   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A          21.18655968   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A          21.60560417   \n",
       "\n",
       "                                         sp|Q96IY4|CBPB2_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A             21.819561   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           22.21127701   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A            21.8640461   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           21.24667168   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A           21.72242355   \n",
       "\n",
       "                                         sp|Q96PD5|PGRP2_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           25.08229065   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           24.88887978   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A           25.16353798   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           25.07989311   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A           25.20258331   \n",
       "\n",
       "                                         sp|Q9HDC9|APMAP_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           20.85553551   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           20.59141159   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A           21.04162788   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           19.46746445   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A           19.91134644   \n",
       "\n",
       "                                         sp|Q9NZP8|C1RL_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           20.4588604   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A          17.14304733   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A          18.74650764   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           20.6216526   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A          21.13340378   \n",
       "\n",
       "                                         sp|Q9UBP9|GULP1_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           28.83514977   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           28.68237686   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A           29.10836601   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           28.35871315   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A           28.25678444   \n",
       "\n",
       "                                         sp|Q9UGM5|FETUB_HUMAN  \\\n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A           21.02322769   \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           21.82271576   \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A           19.95448685   \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A           20.65556526   \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A           20.77088356   \n",
       "\n",
       "                                         sp|Q9Y490|TLN1_HUMAN  \n",
       "LFQ intensity 005_HFX_HW_RAW_IMC_A1_106A          20.34404564  \n",
       "LFQ intensity 006_HFX_HW_RAW_IMC_A2_209A           19.7013855  \n",
       "LFQ intensity 012_HFX_HW_RAW_IMC_A8_214A           20.4085598  \n",
       "LFQ intensity 021_HFX_HW_RAW_IMC_C3_227A          20.66396141  \n",
       "LFQ intensity 024_HFX_HW_RAW_IMC_C6_124A          20.50154495  \n",
       "\n",
       "[5 rows x 267 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_proteomics = pd.read_csv('../../data/proteomics_abunds.txt', sep='\\t', index_col=0).transpose()\n",
    "raw_proteomics.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bd4f4318-0563-4005-90a1-305218f35529",
   "metadata": {},
   "source": [
    "This is removing all columns that doen't have abundance information, taking only unique ID from row names and converting to floats."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d705c47d-572a-4f97-98c3-39e9127139f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CON__P00761</th>\n",
       "      <th>sp|P02533|K1C14_HUMAN;CON__P02533</th>\n",
       "      <th>sp|P02768|ALBU_HUMAN;CON__P02768-1</th>\n",
       "      <th>CON__P02769</th>\n",
       "      <th>sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259</th>\n",
       "      <th>sp|P13645|K1C10_HUMAN;CON__P13645</th>\n",
       "      <th>sp|P13647|K2C5_HUMAN;CON__P13647</th>\n",
       "      <th>CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN</th>\n",
       "      <th>sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3</th>\n",
       "      <th>sp|A0A075B6H9|LV469_HUMAN</th>\n",
       "      <th>...</th>\n",
       "      <th>sp|Q6UWP8|SBSN_HUMAN</th>\n",
       "      <th>sp|Q6UXB8|PI16_HUMAN</th>\n",
       "      <th>sp|Q92736|RYR2_HUMAN</th>\n",
       "      <th>sp|Q96IY4|CBPB2_HUMAN</th>\n",
       "      <th>sp|Q96PD5|PGRP2_HUMAN</th>\n",
       "      <th>sp|Q9HDC9|APMAP_HUMAN</th>\n",
       "      <th>sp|Q9NZP8|C1RL_HUMAN</th>\n",
       "      <th>sp|Q9UBP9|GULP1_HUMAN</th>\n",
       "      <th>sp|Q9UGM5|FETUB_HUMAN</th>\n",
       "      <th>sp|Q9Y490|TLN1_HUMAN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>29.414034</td>\n",
       "      <td>19.480482</td>\n",
       "      <td>33.365807</td>\n",
       "      <td>23.197741</td>\n",
       "      <td>21.572432</td>\n",
       "      <td>21.094681</td>\n",
       "      <td>19.310707</td>\n",
       "      <td>19.288029</td>\n",
       "      <td>19.483946</td>\n",
       "      <td>21.265715</td>\n",
       "      <td>...</td>\n",
       "      <td>18.212572</td>\n",
       "      <td>23.483084</td>\n",
       "      <td>20.453810</td>\n",
       "      <td>21.819561</td>\n",
       "      <td>25.082291</td>\n",
       "      <td>20.855536</td>\n",
       "      <td>20.458860</td>\n",
       "      <td>28.835150</td>\n",
       "      <td>21.023228</td>\n",
       "      <td>20.344046</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>29.336454</td>\n",
       "      <td>19.774490</td>\n",
       "      <td>32.956917</td>\n",
       "      <td>23.397673</td>\n",
       "      <td>21.685078</td>\n",
       "      <td>22.188005</td>\n",
       "      <td>20.725851</td>\n",
       "      <td>20.871471</td>\n",
       "      <td>19.841579</td>\n",
       "      <td>20.858858</td>\n",
       "      <td>...</td>\n",
       "      <td>16.909706</td>\n",
       "      <td>23.315619</td>\n",
       "      <td>18.602100</td>\n",
       "      <td>22.211277</td>\n",
       "      <td>24.888880</td>\n",
       "      <td>20.591412</td>\n",
       "      <td>17.143047</td>\n",
       "      <td>28.682377</td>\n",
       "      <td>21.822716</td>\n",
       "      <td>19.701386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>28.906614</td>\n",
       "      <td>19.529310</td>\n",
       "      <td>33.174339</td>\n",
       "      <td>24.401390</td>\n",
       "      <td>19.594507</td>\n",
       "      <td>22.131947</td>\n",
       "      <td>19.528219</td>\n",
       "      <td>20.954256</td>\n",
       "      <td>18.534006</td>\n",
       "      <td>21.559875</td>\n",
       "      <td>...</td>\n",
       "      <td>13.345544</td>\n",
       "      <td>23.588179</td>\n",
       "      <td>18.659517</td>\n",
       "      <td>21.864046</td>\n",
       "      <td>25.163538</td>\n",
       "      <td>21.041628</td>\n",
       "      <td>18.746508</td>\n",
       "      <td>29.108366</td>\n",
       "      <td>19.954487</td>\n",
       "      <td>20.408560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>29.769306</td>\n",
       "      <td>21.061234</td>\n",
       "      <td>33.328308</td>\n",
       "      <td>24.742115</td>\n",
       "      <td>20.643036</td>\n",
       "      <td>20.368902</td>\n",
       "      <td>20.235897</td>\n",
       "      <td>17.218355</td>\n",
       "      <td>19.047693</td>\n",
       "      <td>21.543921</td>\n",
       "      <td>...</td>\n",
       "      <td>20.807047</td>\n",
       "      <td>23.258249</td>\n",
       "      <td>21.186560</td>\n",
       "      <td>21.246672</td>\n",
       "      <td>25.079893</td>\n",
       "      <td>19.467464</td>\n",
       "      <td>20.621653</td>\n",
       "      <td>28.358713</td>\n",
       "      <td>20.655565</td>\n",
       "      <td>20.663961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>024_HFX_HW_RAW_IMC_C6_124A</th>\n",
       "      <td>29.567360</td>\n",
       "      <td>20.363079</td>\n",
       "      <td>32.930107</td>\n",
       "      <td>23.502518</td>\n",
       "      <td>20.780497</td>\n",
       "      <td>19.634420</td>\n",
       "      <td>20.836931</td>\n",
       "      <td>17.906363</td>\n",
       "      <td>18.741560</td>\n",
       "      <td>22.351000</td>\n",
       "      <td>...</td>\n",
       "      <td>17.501293</td>\n",
       "      <td>23.053396</td>\n",
       "      <td>21.605604</td>\n",
       "      <td>21.722424</td>\n",
       "      <td>25.202583</td>\n",
       "      <td>19.911346</td>\n",
       "      <td>21.133404</td>\n",
       "      <td>28.256784</td>\n",
       "      <td>20.770884</td>\n",
       "      <td>20.501545</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 250 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            CON__P00761  sp|P02533|K1C14_HUMAN;CON__P02533  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A    29.414034                          19.480482   \n",
       "006_HFX_HW_RAW_IMC_A2_209A    29.336454                          19.774490   \n",
       "012_HFX_HW_RAW_IMC_A8_214A    28.906614                          19.529310   \n",
       "021_HFX_HW_RAW_IMC_C3_227A    29.769306                          21.061234   \n",
       "024_HFX_HW_RAW_IMC_C6_124A    29.567360                          20.363079   \n",
       "\n",
       "                            sp|P02768|ALBU_HUMAN;CON__P02768-1  CON__P02769  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                           33.365807    23.197741   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                           32.956917    23.397673   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                           33.174339    24.401390   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                           33.328308    24.742115   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                           32.930107    23.502518   \n",
       "\n",
       "                            sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                      21.572432   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                      21.685078   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                      19.594507   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                      20.643036   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                      20.780497   \n",
       "\n",
       "                            sp|P13645|K1C10_HUMAN;CON__P13645  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                          21.094681   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                          22.188005   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                          22.131947   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                          20.368902   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                          19.634420   \n",
       "\n",
       "                            sp|P13647|K2C5_HUMAN;CON__P13647  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                         19.310707   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                         20.725851   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                         19.528219   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                         20.235897   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                         20.836931   \n",
       "\n",
       "                            CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                       19.288029   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                       20.871471   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                       20.954256   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                       17.218355   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                       17.906363   \n",
       "\n",
       "                            sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                         19.483946   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                         19.841579   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                         18.534006   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                         19.047693   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                         18.741560   \n",
       "\n",
       "                            sp|A0A075B6H9|LV469_HUMAN  ...  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                  21.265715  ...   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                  20.858858  ...   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                  21.559875  ...   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                  21.543921  ...   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                  22.351000  ...   \n",
       "\n",
       "                            sp|Q6UWP8|SBSN_HUMAN  sp|Q6UXB8|PI16_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             18.212572             23.483084   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             16.909706             23.315619   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             13.345544             23.588179   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             20.807047             23.258249   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             17.501293             23.053396   \n",
       "\n",
       "                            sp|Q92736|RYR2_HUMAN  sp|Q96IY4|CBPB2_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             20.453810              21.819561   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             18.602100              22.211277   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             18.659517              21.864046   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             21.186560              21.246672   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             21.605604              21.722424   \n",
       "\n",
       "                            sp|Q96PD5|PGRP2_HUMAN  sp|Q9HDC9|APMAP_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              25.082291              20.855536   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              24.888880              20.591412   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              25.163538              21.041628   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              25.079893              19.467464   \n",
       "024_HFX_HW_RAW_IMC_C6_124A              25.202583              19.911346   \n",
       "\n",
       "                            sp|Q9NZP8|C1RL_HUMAN  sp|Q9UBP9|GULP1_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             20.458860              28.835150   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             17.143047              28.682377   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             18.746508              29.108366   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             20.621653              28.358713   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             21.133404              28.256784   \n",
       "\n",
       "                            sp|Q9UGM5|FETUB_HUMAN  sp|Q9Y490|TLN1_HUMAN  \n",
       "005_HFX_HW_RAW_IMC_A1_106A              21.023228             20.344046  \n",
       "006_HFX_HW_RAW_IMC_A2_209A              21.822716             19.701386  \n",
       "012_HFX_HW_RAW_IMC_A8_214A              19.954487             20.408560  \n",
       "021_HFX_HW_RAW_IMC_C3_227A              20.655565             20.663961  \n",
       "024_HFX_HW_RAW_IMC_C6_124A              20.770884             20.501545  \n",
       "\n",
       "[5 rows x 250 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "proteomics = raw_proteomics.loc[[i for i in raw_proteomics.index if i.startswith('LFQ intensity')],\n",
    "                                raw_proteomics.columns[list(raw_proteomics.columns).index('Subject ID')+1:]]\n",
    "proteomics.index = [i.split()[-1] for i in proteomics.index]\n",
    "proteomics = proteomics.astype(float)\n",
    "proteomics.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9f89924d-3480-49b9-ae04-d5f820295de1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250\n"
     ]
    }
   ],
   "source": [
    "print(len(proteomics.columns))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f613af37-06ef-4f8e-90e5-51702fc6ac88",
   "metadata": {},
   "source": [
    "250 proteins measured.\n",
    "\n",
    "Convert the abundances to relative abundance for comparison of normalizations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4bfeee8c-6683-4328-af45-2a4716107958",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CON__P00761</th>\n",
       "      <th>sp|P02533|K1C14_HUMAN;CON__P02533</th>\n",
       "      <th>sp|P02768|ALBU_HUMAN;CON__P02768-1</th>\n",
       "      <th>CON__P02769</th>\n",
       "      <th>sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259</th>\n",
       "      <th>sp|P13645|K1C10_HUMAN;CON__P13645</th>\n",
       "      <th>sp|P13647|K2C5_HUMAN;CON__P13647</th>\n",
       "      <th>CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN</th>\n",
       "      <th>sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3</th>\n",
       "      <th>sp|A0A075B6H9|LV469_HUMAN</th>\n",
       "      <th>...</th>\n",
       "      <th>sp|Q6UWP8|SBSN_HUMAN</th>\n",
       "      <th>sp|Q6UXB8|PI16_HUMAN</th>\n",
       "      <th>sp|Q92736|RYR2_HUMAN</th>\n",
       "      <th>sp|Q96IY4|CBPB2_HUMAN</th>\n",
       "      <th>sp|Q96PD5|PGRP2_HUMAN</th>\n",
       "      <th>sp|Q9HDC9|APMAP_HUMAN</th>\n",
       "      <th>sp|Q9NZP8|C1RL_HUMAN</th>\n",
       "      <th>sp|Q9UBP9|GULP1_HUMAN</th>\n",
       "      <th>sp|Q9UGM5|FETUB_HUMAN</th>\n",
       "      <th>sp|Q9Y490|TLN1_HUMAN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>0.005043</td>\n",
       "      <td>0.003340</td>\n",
       "      <td>0.005721</td>\n",
       "      <td>0.003977</td>\n",
       "      <td>0.003699</td>\n",
       "      <td>0.003617</td>\n",
       "      <td>0.003311</td>\n",
       "      <td>0.003307</td>\n",
       "      <td>0.003341</td>\n",
       "      <td>0.003646</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003123</td>\n",
       "      <td>0.004026</td>\n",
       "      <td>0.003507</td>\n",
       "      <td>0.003741</td>\n",
       "      <td>0.004301</td>\n",
       "      <td>0.003576</td>\n",
       "      <td>0.003508</td>\n",
       "      <td>0.004944</td>\n",
       "      <td>0.003605</td>\n",
       "      <td>0.003488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>0.005066</td>\n",
       "      <td>0.003415</td>\n",
       "      <td>0.005691</td>\n",
       "      <td>0.004041</td>\n",
       "      <td>0.003745</td>\n",
       "      <td>0.003832</td>\n",
       "      <td>0.003579</td>\n",
       "      <td>0.003604</td>\n",
       "      <td>0.003426</td>\n",
       "      <td>0.003602</td>\n",
       "      <td>...</td>\n",
       "      <td>0.002920</td>\n",
       "      <td>0.004026</td>\n",
       "      <td>0.003212</td>\n",
       "      <td>0.003836</td>\n",
       "      <td>0.004298</td>\n",
       "      <td>0.003556</td>\n",
       "      <td>0.002960</td>\n",
       "      <td>0.004953</td>\n",
       "      <td>0.003769</td>\n",
       "      <td>0.003402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>0.005009</td>\n",
       "      <td>0.003384</td>\n",
       "      <td>0.005749</td>\n",
       "      <td>0.004229</td>\n",
       "      <td>0.003396</td>\n",
       "      <td>0.003835</td>\n",
       "      <td>0.003384</td>\n",
       "      <td>0.003631</td>\n",
       "      <td>0.003212</td>\n",
       "      <td>0.003736</td>\n",
       "      <td>...</td>\n",
       "      <td>0.002313</td>\n",
       "      <td>0.004088</td>\n",
       "      <td>0.003234</td>\n",
       "      <td>0.003789</td>\n",
       "      <td>0.004361</td>\n",
       "      <td>0.003646</td>\n",
       "      <td>0.003249</td>\n",
       "      <td>0.005044</td>\n",
       "      <td>0.003458</td>\n",
       "      <td>0.003537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>0.005104</td>\n",
       "      <td>0.003611</td>\n",
       "      <td>0.005715</td>\n",
       "      <td>0.004242</td>\n",
       "      <td>0.003540</td>\n",
       "      <td>0.003493</td>\n",
       "      <td>0.003470</td>\n",
       "      <td>0.002952</td>\n",
       "      <td>0.003266</td>\n",
       "      <td>0.003694</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003568</td>\n",
       "      <td>0.003988</td>\n",
       "      <td>0.003633</td>\n",
       "      <td>0.003643</td>\n",
       "      <td>0.004300</td>\n",
       "      <td>0.003338</td>\n",
       "      <td>0.003536</td>\n",
       "      <td>0.004862</td>\n",
       "      <td>0.003542</td>\n",
       "      <td>0.003543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>024_HFX_HW_RAW_IMC_C6_124A</th>\n",
       "      <td>0.005069</td>\n",
       "      <td>0.003491</td>\n",
       "      <td>0.005646</td>\n",
       "      <td>0.004030</td>\n",
       "      <td>0.003563</td>\n",
       "      <td>0.003366</td>\n",
       "      <td>0.003573</td>\n",
       "      <td>0.003070</td>\n",
       "      <td>0.003213</td>\n",
       "      <td>0.003832</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003001</td>\n",
       "      <td>0.003953</td>\n",
       "      <td>0.003704</td>\n",
       "      <td>0.003724</td>\n",
       "      <td>0.004321</td>\n",
       "      <td>0.003414</td>\n",
       "      <td>0.003623</td>\n",
       "      <td>0.004845</td>\n",
       "      <td>0.003561</td>\n",
       "      <td>0.003515</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 250 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            CON__P00761  sp|P02533|K1C14_HUMAN;CON__P02533  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A     0.005043                           0.003340   \n",
       "006_HFX_HW_RAW_IMC_A2_209A     0.005066                           0.003415   \n",
       "012_HFX_HW_RAW_IMC_A8_214A     0.005009                           0.003384   \n",
       "021_HFX_HW_RAW_IMC_C3_227A     0.005104                           0.003611   \n",
       "024_HFX_HW_RAW_IMC_C6_124A     0.005069                           0.003491   \n",
       "\n",
       "                            sp|P02768|ALBU_HUMAN;CON__P02768-1  CON__P02769  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                            0.005721     0.003977   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                            0.005691     0.004041   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                            0.005749     0.004229   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                            0.005715     0.004242   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                            0.005646     0.004030   \n",
       "\n",
       "                            sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                       0.003699   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                       0.003745   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                       0.003396   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                       0.003540   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                       0.003563   \n",
       "\n",
       "                            sp|P13645|K1C10_HUMAN;CON__P13645  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                           0.003617   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                           0.003832   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                           0.003835   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                           0.003493   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                           0.003366   \n",
       "\n",
       "                            sp|P13647|K2C5_HUMAN;CON__P13647  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                          0.003311   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                          0.003579   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                          0.003384   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                          0.003470   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                          0.003573   \n",
       "\n",
       "                            CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                        0.003307   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                        0.003604   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                        0.003631   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                        0.002952   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                        0.003070   \n",
       "\n",
       "                            sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                          0.003341   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                          0.003426   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                          0.003212   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                          0.003266   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                          0.003213   \n",
       "\n",
       "                            sp|A0A075B6H9|LV469_HUMAN  ...  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                   0.003646  ...   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                   0.003602  ...   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                   0.003736  ...   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                   0.003694  ...   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                   0.003832  ...   \n",
       "\n",
       "                            sp|Q6UWP8|SBSN_HUMAN  sp|Q6UXB8|PI16_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              0.003123              0.004026   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              0.002920              0.004026   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              0.002313              0.004088   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              0.003568              0.003988   \n",
       "024_HFX_HW_RAW_IMC_C6_124A              0.003001              0.003953   \n",
       "\n",
       "                            sp|Q92736|RYR2_HUMAN  sp|Q96IY4|CBPB2_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              0.003507               0.003741   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              0.003212               0.003836   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              0.003234               0.003789   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              0.003633               0.003643   \n",
       "024_HFX_HW_RAW_IMC_C6_124A              0.003704               0.003724   \n",
       "\n",
       "                            sp|Q96PD5|PGRP2_HUMAN  sp|Q9HDC9|APMAP_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A               0.004301               0.003576   \n",
       "006_HFX_HW_RAW_IMC_A2_209A               0.004298               0.003556   \n",
       "012_HFX_HW_RAW_IMC_A8_214A               0.004361               0.003646   \n",
       "021_HFX_HW_RAW_IMC_C3_227A               0.004300               0.003338   \n",
       "024_HFX_HW_RAW_IMC_C6_124A               0.004321               0.003414   \n",
       "\n",
       "                            sp|Q9NZP8|C1RL_HUMAN  sp|Q9UBP9|GULP1_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              0.003508               0.004944   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              0.002960               0.004953   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              0.003249               0.005044   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              0.003536               0.004862   \n",
       "024_HFX_HW_RAW_IMC_C6_124A              0.003623               0.004845   \n",
       "\n",
       "                            sp|Q9UGM5|FETUB_HUMAN  sp|Q9Y490|TLN1_HUMAN  \n",
       "005_HFX_HW_RAW_IMC_A1_106A               0.003605              0.003488  \n",
       "006_HFX_HW_RAW_IMC_A2_209A               0.003769              0.003402  \n",
       "012_HFX_HW_RAW_IMC_A8_214A               0.003458              0.003537  \n",
       "021_HFX_HW_RAW_IMC_C3_227A               0.003542              0.003543  \n",
       "024_HFX_HW_RAW_IMC_C6_124A               0.003561              0.003515  \n",
       "\n",
       "[5 rows x 250 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "proteomics_rel = proteomics.div(proteomics.sum(axis=1), axis=0)\n",
    "proteomics_rel.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b37ceaa6-d8bc-4251-9cd7-97d819e7da29",
   "metadata": {},
   "source": [
    "Same thing but with CLR."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1ea0ee20-3191-494f-b8d8-b507fc763046",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CON__P00761</th>\n",
       "      <th>sp|P02533|K1C14_HUMAN;CON__P02533</th>\n",
       "      <th>sp|P02768|ALBU_HUMAN;CON__P02768-1</th>\n",
       "      <th>CON__P02769</th>\n",
       "      <th>sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259</th>\n",
       "      <th>sp|P13645|K1C10_HUMAN;CON__P13645</th>\n",
       "      <th>sp|P13647|K2C5_HUMAN;CON__P13647</th>\n",
       "      <th>CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN</th>\n",
       "      <th>sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3</th>\n",
       "      <th>sp|A0A075B6H9|LV469_HUMAN</th>\n",
       "      <th>...</th>\n",
       "      <th>sp|Q6UWP8|SBSN_HUMAN</th>\n",
       "      <th>sp|Q6UXB8|PI16_HUMAN</th>\n",
       "      <th>sp|Q92736|RYR2_HUMAN</th>\n",
       "      <th>sp|Q96IY4|CBPB2_HUMAN</th>\n",
       "      <th>sp|Q96PD5|PGRP2_HUMAN</th>\n",
       "      <th>sp|Q9HDC9|APMAP_HUMAN</th>\n",
       "      <th>sp|Q9NZP8|C1RL_HUMAN</th>\n",
       "      <th>sp|Q9UBP9|GULP1_HUMAN</th>\n",
       "      <th>sp|Q9UGM5|FETUB_HUMAN</th>\n",
       "      <th>sp|Q9Y490|TLN1_HUMAN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>0.239511</td>\n",
       "      <td>-0.172531</td>\n",
       "      <td>0.365567</td>\n",
       "      <td>0.002103</td>\n",
       "      <td>-0.070532</td>\n",
       "      <td>-0.092927</td>\n",
       "      <td>-0.181283</td>\n",
       "      <td>-0.182458</td>\n",
       "      <td>-0.172353</td>\n",
       "      <td>-0.084852</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.239828</td>\n",
       "      <td>0.014328</td>\n",
       "      <td>-0.123777</td>\n",
       "      <td>-0.059142</td>\n",
       "      <td>0.080207</td>\n",
       "      <td>-0.104328</td>\n",
       "      <td>-0.123530</td>\n",
       "      <td>0.219635</td>\n",
       "      <td>-0.096319</td>\n",
       "      <td>-0.129157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>0.244758</td>\n",
       "      <td>-0.149664</td>\n",
       "      <td>0.361124</td>\n",
       "      <td>0.018572</td>\n",
       "      <td>-0.057437</td>\n",
       "      <td>-0.034510</td>\n",
       "      <td>-0.102677</td>\n",
       "      <td>-0.095676</td>\n",
       "      <td>-0.146277</td>\n",
       "      <td>-0.096280</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.306160</td>\n",
       "      <td>0.015059</td>\n",
       "      <td>-0.210779</td>\n",
       "      <td>-0.033462</td>\n",
       "      <td>0.080354</td>\n",
       "      <td>-0.109184</td>\n",
       "      <td>-0.292456</td>\n",
       "      <td>0.222211</td>\n",
       "      <td>-0.051110</td>\n",
       "      <td>-0.153367</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>0.234427</td>\n",
       "      <td>-0.157710</td>\n",
       "      <td>0.372129</td>\n",
       "      <td>0.065003</td>\n",
       "      <td>-0.154378</td>\n",
       "      <td>-0.032611</td>\n",
       "      <td>-0.157766</td>\n",
       "      <td>-0.087288</td>\n",
       "      <td>-0.210017</td>\n",
       "      <td>-0.058797</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.538420</td>\n",
       "      <td>0.031110</td>\n",
       "      <td>-0.203268</td>\n",
       "      <td>-0.044789</td>\n",
       "      <td>0.095758</td>\n",
       "      <td>-0.083128</td>\n",
       "      <td>-0.198617</td>\n",
       "      <td>0.241382</td>\n",
       "      <td>-0.136174</td>\n",
       "      <td>-0.113674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>0.251985</td>\n",
       "      <td>-0.094045</td>\n",
       "      <td>0.364910</td>\n",
       "      <td>0.067021</td>\n",
       "      <td>-0.114100</td>\n",
       "      <td>-0.127468</td>\n",
       "      <td>-0.134019</td>\n",
       "      <td>-0.295493</td>\n",
       "      <td>-0.194528</td>\n",
       "      <td>-0.071387</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.106187</td>\n",
       "      <td>0.005176</td>\n",
       "      <td>-0.088113</td>\n",
       "      <td>-0.085279</td>\n",
       "      <td>0.080580</td>\n",
       "      <td>-0.172731</td>\n",
       "      <td>-0.115137</td>\n",
       "      <td>0.203443</td>\n",
       "      <td>-0.113494</td>\n",
       "      <td>-0.113087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>024_HFX_HW_RAW_IMC_C6_124A</th>\n",
       "      <td>0.245111</td>\n",
       "      <td>-0.127821</td>\n",
       "      <td>0.352824</td>\n",
       "      <td>0.015557</td>\n",
       "      <td>-0.107531</td>\n",
       "      <td>-0.164258</td>\n",
       "      <td>-0.104819</td>\n",
       "      <td>-0.256382</td>\n",
       "      <td>-0.210797</td>\n",
       "      <td>-0.034678</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.279262</td>\n",
       "      <td>-0.003737</td>\n",
       "      <td>-0.068595</td>\n",
       "      <td>-0.063202</td>\n",
       "      <td>0.085393</td>\n",
       "      <td>-0.150254</td>\n",
       "      <td>-0.090691</td>\n",
       "      <td>0.199775</td>\n",
       "      <td>-0.107993</td>\n",
       "      <td>-0.121045</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 250 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            CON__P00761  sp|P02533|K1C14_HUMAN;CON__P02533  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A     0.239511                          -0.172531   \n",
       "006_HFX_HW_RAW_IMC_A2_209A     0.244758                          -0.149664   \n",
       "012_HFX_HW_RAW_IMC_A8_214A     0.234427                          -0.157710   \n",
       "021_HFX_HW_RAW_IMC_C3_227A     0.251985                          -0.094045   \n",
       "024_HFX_HW_RAW_IMC_C6_124A     0.245111                          -0.127821   \n",
       "\n",
       "                            sp|P02768|ALBU_HUMAN;CON__P02768-1  CON__P02769  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                            0.365567     0.002103   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                            0.361124     0.018572   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                            0.372129     0.065003   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                            0.364910     0.067021   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                            0.352824     0.015557   \n",
       "\n",
       "                            sp|P48668|K2C6C_HUMAN;CON__P48668;CON__P04259  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                      -0.070532   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                      -0.057437   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                      -0.154378   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                      -0.114100   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                      -0.107531   \n",
       "\n",
       "                            sp|P13645|K1C10_HUMAN;CON__P13645  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                          -0.092927   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                          -0.034510   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                          -0.032611   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                          -0.127468   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                          -0.164258   \n",
       "\n",
       "                            sp|P13647|K2C5_HUMAN;CON__P13647  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                         -0.181283   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                         -0.102677   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                         -0.157766   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                         -0.134019   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                         -0.104819   \n",
       "\n",
       "                            CON__P35908;CON__P35908v2;sp|P35908|K22E_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                                       -0.182458   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                                       -0.095676   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                                       -0.087288   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                                       -0.295493   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                                       -0.256382   \n",
       "\n",
       "                            sp|Q86YZ3|HORN_HUMAN;CON__Q86YZ3  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                         -0.172353   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                         -0.146277   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                         -0.210017   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                         -0.194528   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                         -0.210797   \n",
       "\n",
       "                            sp|A0A075B6H9|LV469_HUMAN  ...  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                  -0.084852  ...   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                  -0.096280  ...   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                  -0.058797  ...   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                  -0.071387  ...   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                  -0.034678  ...   \n",
       "\n",
       "                            sp|Q6UWP8|SBSN_HUMAN  sp|Q6UXB8|PI16_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             -0.239828              0.014328   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             -0.306160              0.015059   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             -0.538420              0.031110   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             -0.106187              0.005176   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             -0.279262             -0.003737   \n",
       "\n",
       "                            sp|Q92736|RYR2_HUMAN  sp|Q96IY4|CBPB2_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             -0.123777              -0.059142   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             -0.210779              -0.033462   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             -0.203268              -0.044789   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             -0.088113              -0.085279   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             -0.068595              -0.063202   \n",
       "\n",
       "                            sp|Q96PD5|PGRP2_HUMAN  sp|Q9HDC9|APMAP_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A               0.080207              -0.104328   \n",
       "006_HFX_HW_RAW_IMC_A2_209A               0.080354              -0.109184   \n",
       "012_HFX_HW_RAW_IMC_A8_214A               0.095758              -0.083128   \n",
       "021_HFX_HW_RAW_IMC_C3_227A               0.080580              -0.172731   \n",
       "024_HFX_HW_RAW_IMC_C6_124A               0.085393              -0.150254   \n",
       "\n",
       "                            sp|Q9NZP8|C1RL_HUMAN  sp|Q9UBP9|GULP1_HUMAN  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A             -0.123530               0.219635   \n",
       "006_HFX_HW_RAW_IMC_A2_209A             -0.292456               0.222211   \n",
       "012_HFX_HW_RAW_IMC_A8_214A             -0.198617               0.241382   \n",
       "021_HFX_HW_RAW_IMC_C3_227A             -0.115137               0.203443   \n",
       "024_HFX_HW_RAW_IMC_C6_124A             -0.090691               0.199775   \n",
       "\n",
       "                            sp|Q9UGM5|FETUB_HUMAN  sp|Q9Y490|TLN1_HUMAN  \n",
       "005_HFX_HW_RAW_IMC_A1_106A              -0.096319             -0.129157  \n",
       "006_HFX_HW_RAW_IMC_A2_209A              -0.051110             -0.153367  \n",
       "012_HFX_HW_RAW_IMC_A8_214A              -0.136174             -0.113674  \n",
       "021_HFX_HW_RAW_IMC_C3_227A              -0.113494             -0.113087  \n",
       "024_HFX_HW_RAW_IMC_C6_124A              -0.107993             -0.121045  \n",
       "\n",
       "[5 rows x 250 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "proteomics_clr = pd.DataFrame(clr(proteomics + .001), index=proteomics.index, columns=proteomics.columns)\n",
    "proteomics_clr.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9de5df7c-0421-438b-86dd-64766522594a",
   "metadata": {},
   "source": [
    "Now we will pull the metadata from the non-abundance columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "28f85a63-f3ce-469b-b252-ef279554d166",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BioID</th>\n",
       "      <th>Well</th>\n",
       "      <th>Run Number</th>\n",
       "      <th>Hospital</th>\n",
       "      <th>VisitCode</th>\n",
       "      <th>Age</th>\n",
       "      <th>Draw</th>\n",
       "      <th>Matrix</th>\n",
       "      <th>Draw_Matrix</th>\n",
       "      <th>Draw_Matrix_Time</th>\n",
       "      <th>Plate</th>\n",
       "      <th>Responder Status</th>\n",
       "      <th>Group_ID</th>\n",
       "      <th>Group_ID_Responder</th>\n",
       "      <th>Responder Status NVRHVR</th>\n",
       "      <th>Group_ID_Responder2Groups</th>\n",
       "      <th>BabyN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>106A</td>\n",
       "      <td>A1</td>\n",
       "      <td>5</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>63</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>1</td>\n",
       "      <td>LVR</td>\n",
       "      <td>V5_Heelstick_Serum_LVR</td>\n",
       "      <td>V5_Heelstick_Serum_LVR</td>\n",
       "      <td>NVR</td>\n",
       "      <td>V5_Heelstick_Serum_NVR</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>209A</td>\n",
       "      <td>A2</td>\n",
       "      <td>6</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>55</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>1</td>\n",
       "      <td>NVR</td>\n",
       "      <td>V5_Heelstick_Serum_NVR</td>\n",
       "      <td>V5_Heelstick_Serum_NVR</td>\n",
       "      <td>NVR</td>\n",
       "      <td>V5_Heelstick_Serum_NVR</td>\n",
       "      <td>209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>214A</td>\n",
       "      <td>A8</td>\n",
       "      <td>12</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>89</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>1</td>\n",
       "      <td>HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>227A</td>\n",
       "      <td>C3</td>\n",
       "      <td>21</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>57</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>1</td>\n",
       "      <td>HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>HVR</td>\n",
       "      <td>V5_Heelstick_Serum_HVR</td>\n",
       "      <td>226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>024_HFX_HW_RAW_IMC_C6_124A</th>\n",
       "      <td>124A</td>\n",
       "      <td>C6</td>\n",
       "      <td>24</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>97</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>V5_Heelstick_Serum_NA</td>\n",
       "      <td>V5_Heelstick_Serum_NA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>V5_Heelstick_Serum_NA</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           BioID Well Run Number        Hospital VisitCode  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  106A   A1          5  Well Check 02m        V5   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  209A   A2          6  Well Check 02m        V5   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  214A   A8         12  Well Check 02m        V5   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  227A   C3         21  Well Check 02m        V5   \n",
       "024_HFX_HW_RAW_IMC_C6_124A  124A   C6         24  Well Check 02m        V5   \n",
       "\n",
       "                           Age       Draw Matrix      Draw_Matrix  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  63  Heelstick  Serum  Heelstick_Serum   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  55  Heelstick  Serum  Heelstick_Serum   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  89  Heelstick  Serum  Heelstick_Serum   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  57  Heelstick  Serum  Heelstick_Serum   \n",
       "024_HFX_HW_RAW_IMC_C6_124A  97  Heelstick  Serum  Heelstick_Serum   \n",
       "\n",
       "                              Draw_Matrix_Time Plate Responder Status  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  V5_Heelstick_Serum     1              LVR   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  V5_Heelstick_Serum     1              NVR   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  V5_Heelstick_Serum     1              HVR   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  V5_Heelstick_Serum     1              HVR   \n",
       "024_HFX_HW_RAW_IMC_C6_124A  V5_Heelstick_Serum     1              NaN   \n",
       "\n",
       "                                          Group_ID      Group_ID_Responder  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  V5_Heelstick_Serum_LVR  V5_Heelstick_Serum_LVR   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  V5_Heelstick_Serum_NVR  V5_Heelstick_Serum_NVR   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  V5_Heelstick_Serum_HVR  V5_Heelstick_Serum_HVR   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  V5_Heelstick_Serum_HVR  V5_Heelstick_Serum_HVR   \n",
       "024_HFX_HW_RAW_IMC_C6_124A   V5_Heelstick_Serum_NA   V5_Heelstick_Serum_NA   \n",
       "\n",
       "                           Responder Status NVRHVR Group_ID_Responder2Groups  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A                     NVR    V5_Heelstick_Serum_NVR   \n",
       "006_HFX_HW_RAW_IMC_A2_209A                     NVR    V5_Heelstick_Serum_NVR   \n",
       "012_HFX_HW_RAW_IMC_A8_214A                     HVR    V5_Heelstick_Serum_HVR   \n",
       "021_HFX_HW_RAW_IMC_C3_227A                     HVR    V5_Heelstick_Serum_HVR   \n",
       "024_HFX_HW_RAW_IMC_C6_124A                     NaN     V5_Heelstick_Serum_NA   \n",
       "\n",
       "                            BabyN  \n",
       "005_HFX_HW_RAW_IMC_A1_106A    106  \n",
       "006_HFX_HW_RAW_IMC_A2_209A    209  \n",
       "012_HFX_HW_RAW_IMC_A8_214A    214  \n",
       "021_HFX_HW_RAW_IMC_C3_227A    226  \n",
       "024_HFX_HW_RAW_IMC_C6_124A    124  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta_base = raw_proteomics.loc[[i for i in raw_proteomics.index if i.startswith('LFQ intensity')],\n",
    "                               raw_proteomics.columns[:list(raw_proteomics.columns).index('Subject ID')+1]]\n",
    "meta_base.index = [i.split()[-1] for i in meta_base.index]\n",
    "column_name_replacements = {'Subject ID': 'BabyN', 'VisitID': 'VisitCode'}\n",
    "meta_base.columns = [i if i not in column_name_replacements else column_name_replacements[i] for i in meta_base.columns]\n",
    "meta_base['BabyN'] = [int(i) for i in meta_base['BabyN']]\n",
    "meta_base.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e2d9d0a-aab5-4cb6-b2d8-c865a2d4dc21",
   "metadata": {},
   "source": [
    "Bring in titer data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "387aabaf-39db-4a87-8fd7-9e129a748afc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PT</th>\n",
       "      <th>Dip</th>\n",
       "      <th>FHA</th>\n",
       "      <th>PRN</th>\n",
       "      <th>TET</th>\n",
       "      <th>PRP (Hib)</th>\n",
       "      <th>PCV ST1</th>\n",
       "      <th>PCV ST3</th>\n",
       "      <th>PCV ST4</th>\n",
       "      <th>PCV ST5</th>\n",
       "      <th>...</th>\n",
       "      <th>median_mmNorm</th>\n",
       "      <th>median_mmNorm_DTAPHib</th>\n",
       "      <th>median_mmNorm_PCV</th>\n",
       "      <th>PT_protected</th>\n",
       "      <th>Dip_protected</th>\n",
       "      <th>FHA_protected</th>\n",
       "      <th>PRN_protected</th>\n",
       "      <th>TET_protected</th>\n",
       "      <th>PRP (Hib)_protected</th>\n",
       "      <th>VR_group</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>2.5</td>\n",
       "      <td>0.21</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>0.30</td>\n",
       "      <td>0.39</td>\n",
       "      <td>141.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>0.052874</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>2.5</td>\n",
       "      <td>0.44</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.52</td>\n",
       "      <td>1.60</td>\n",
       "      <td>2430.0</td>\n",
       "      <td>415.0</td>\n",
       "      <td>194.0</td>\n",
       "      <td>332.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.449483</td>\n",
       "      <td>0.114018</td>\n",
       "      <td>0.958142</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>2.5</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>0.05</td>\n",
       "      <td>0.27</td>\n",
       "      <td>21.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.003102</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>LVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>27.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>63.0</td>\n",
       "      <td>1.35</td>\n",
       "      <td>7.02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.700925</td>\n",
       "      <td>0.763049</td>\n",
       "      <td>0.486810</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110</th>\n",
       "      <td>14.0</td>\n",
       "      <td>0.24</td>\n",
       "      <td>15.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>2.45</td>\n",
       "      <td>NaN</td>\n",
       "      <td>301.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>400.0</td>\n",
       "      <td>289.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.266219</td>\n",
       "      <td>0.284211</td>\n",
       "      <td>0.245121</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       PT   Dip   FHA   PRN   TET  PRP (Hib)  PCV ST1  PCV ST3  PCV ST4  \\\n",
       "106   2.5  0.21  11.0   2.5  0.30       0.39    141.0     35.0     56.0   \n",
       "107   2.5  0.44   3.0   9.0  0.52       1.60   2430.0    415.0    194.0   \n",
       "108   2.5  0.05   1.5   2.5  0.05       0.27     21.0      3.0     24.0   \n",
       "109  27.0   NaN   NaN  63.0  1.35       7.02      NaN      NaN      NaN   \n",
       "110  14.0  0.24  15.0  20.0  2.45        NaN    301.0     63.0    400.0   \n",
       "\n",
       "     PCV ST5  ...  median_mmNorm  median_mmNorm_DTAPHib  median_mmNorm_PCV  \\\n",
       "106    139.0  ...       0.061955               0.052874           0.061955   \n",
       "107    332.0  ...       0.449483               0.114018           0.958142   \n",
       "108     41.0  ...       0.000000               0.000000           0.003102   \n",
       "109      NaN  ...       0.700925               0.763049           0.486810   \n",
       "110    289.0  ...       0.266219               0.284211           0.245121   \n",
       "\n",
       "     PT_protected  Dip_protected  FHA_protected  PRN_protected  TET_protected  \\\n",
       "106         False           True           True          False           True   \n",
       "107         False           True          False           True           True   \n",
       "108         False          False          False          False          False   \n",
       "109          True          False          False           True           True   \n",
       "110          True           True           True           True           True   \n",
       "\n",
       "     PRP (Hib)_protected  VR_group  \n",
       "106                 True       NVR  \n",
       "107                 True       NVR  \n",
       "108                 True       LVR  \n",
       "109                 True       NVR  \n",
       "110                False       NVR  \n",
       "\n",
       "[5 rows x 48 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titer_data = pd.read_csv('../../data/vaccine_response/vaccine_response_y1.tsv', sep='\\t', index_col=0)\n",
    "titer_data.index = [int(i.split('Baby')[-1]) for i in titer_data.index]\n",
    "titer_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c433fe5-9012-41c8-a902-4ba27d3da7e8",
   "metadata": {},
   "source": [
    "Split titer data to be per sample that we have in the proteomics metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8d366c59-6e0e-4226-a92b-7806106031ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PT</th>\n",
       "      <th>Dip</th>\n",
       "      <th>FHA</th>\n",
       "      <th>PRN</th>\n",
       "      <th>TET</th>\n",
       "      <th>PRP (Hib)</th>\n",
       "      <th>PCV ST1</th>\n",
       "      <th>PCV ST3</th>\n",
       "      <th>PCV ST4</th>\n",
       "      <th>PCV ST5</th>\n",
       "      <th>...</th>\n",
       "      <th>median_mmNorm</th>\n",
       "      <th>median_mmNorm_DTAPHib</th>\n",
       "      <th>median_mmNorm_PCV</th>\n",
       "      <th>PT_protected</th>\n",
       "      <th>Dip_protected</th>\n",
       "      <th>FHA_protected</th>\n",
       "      <th>PRN_protected</th>\n",
       "      <th>TET_protected</th>\n",
       "      <th>PRP (Hib)_protected</th>\n",
       "      <th>VR_group</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>2.5</td>\n",
       "      <td>0.21</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.39</td>\n",
       "      <td>141.0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>56.0</td>\n",
       "      <td>139.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>0.052874</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>5.0</td>\n",
       "      <td>0.29</td>\n",
       "      <td>7.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.27</td>\n",
       "      <td>1.17</td>\n",
       "      <td>154.0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>679.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.102041</td>\n",
       "      <td>0.105087</td>\n",
       "      <td>0.074972</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>12.0</td>\n",
       "      <td>0.28</td>\n",
       "      <td>29.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.66</td>\n",
       "      <td>4.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.310899</td>\n",
       "      <td>0.310899</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>6.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>11.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.82</td>\n",
       "      <td>2.25</td>\n",
       "      <td>381.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>322.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.191269</td>\n",
       "      <td>0.205742</td>\n",
       "      <td>0.139576</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>026_HFX_HW_RAW_IMC_C8_208A</th>\n",
       "      <td>6.0</td>\n",
       "      <td>0.32</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.5</td>\n",
       "      <td>0.14</td>\n",
       "      <td>0.91</td>\n",
       "      <td>297.0</td>\n",
       "      <td>193.0</td>\n",
       "      <td>181.0</td>\n",
       "      <td>278.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.125416</td>\n",
       "      <td>0.113055</td>\n",
       "      <td>0.125416</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                              PT   Dip   FHA   PRN   TET PRP (Hib) PCV ST1  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A   2.5  0.21  11.0   2.5   0.3      0.39   141.0   \n",
       "006_HFX_HW_RAW_IMC_A2_209A   5.0  0.29   7.0   5.0  0.27      1.17   154.0   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  12.0  0.28  29.0  19.0  0.66       4.7     NaN   \n",
       "021_HFX_HW_RAW_IMC_C3_227A   6.0  0.25  11.0  20.0  0.82      2.25   381.0   \n",
       "026_HFX_HW_RAW_IMC_C8_208A   6.0  0.32   9.0   2.5  0.14      0.91   297.0   \n",
       "\n",
       "                           PCV ST3 PCV ST4 PCV ST5  ... median_mmNorm  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A    35.0    56.0   139.0  ...      0.061955   \n",
       "006_HFX_HW_RAW_IMC_A2_209A    40.0    89.0   679.0  ...      0.102041   \n",
       "012_HFX_HW_RAW_IMC_A8_214A     NaN     NaN     NaN  ...      0.310899   \n",
       "021_HFX_HW_RAW_IMC_C3_227A    94.0    90.0   322.0  ...      0.191269   \n",
       "026_HFX_HW_RAW_IMC_C8_208A   193.0   181.0   278.0  ...      0.125416   \n",
       "\n",
       "                           median_mmNorm_DTAPHib median_mmNorm_PCV  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              0.052874          0.061955   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              0.105087          0.074972   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              0.310899               NaN   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              0.205742          0.139576   \n",
       "026_HFX_HW_RAW_IMC_C8_208A              0.113055          0.125416   \n",
       "\n",
       "                           PT_protected Dip_protected FHA_protected  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A        False          True          True   \n",
       "006_HFX_HW_RAW_IMC_A2_209A        False          True         False   \n",
       "012_HFX_HW_RAW_IMC_A8_214A         True          True          True   \n",
       "021_HFX_HW_RAW_IMC_C3_227A        False          True          True   \n",
       "026_HFX_HW_RAW_IMC_C8_208A        False          True          True   \n",
       "\n",
       "                           PRN_protected TET_protected PRP (Hib)_protected  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A         False          True                True   \n",
       "006_HFX_HW_RAW_IMC_A2_209A         False          True                True   \n",
       "012_HFX_HW_RAW_IMC_A8_214A          True          True                True   \n",
       "021_HFX_HW_RAW_IMC_C3_227A          True          True                True   \n",
       "026_HFX_HW_RAW_IMC_C8_208A         False          True                True   \n",
       "\n",
       "                           VR_group  \n",
       "005_HFX_HW_RAW_IMC_A1_106A      NVR  \n",
       "006_HFX_HW_RAW_IMC_A2_209A      NVR  \n",
       "012_HFX_HW_RAW_IMC_A8_214A      NVR  \n",
       "021_HFX_HW_RAW_IMC_C3_227A      NVR  \n",
       "026_HFX_HW_RAW_IMC_C8_208A      NVR  \n",
       "\n",
       "[5 rows x 48 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "per_sample_titer_data = pd.DataFrame({sample: titer_data.loc[i] for sample, i in meta_base['BabyN'].iteritems() if i in titer_data.index}).transpose()\n",
    "per_sample_titer_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0c67278-4a14-4be4-bd30-ce3ada7712e1",
   "metadata": {},
   "source": [
    "Merge proteomics metadata and titer data. Remove samples without a VR group."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f6f8040f-3273-408e-893d-bc1a6e3e7dc5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BioID</th>\n",
       "      <th>Well</th>\n",
       "      <th>Run Number</th>\n",
       "      <th>Hospital</th>\n",
       "      <th>VisitCode</th>\n",
       "      <th>Age</th>\n",
       "      <th>Draw</th>\n",
       "      <th>Matrix</th>\n",
       "      <th>Draw_Matrix</th>\n",
       "      <th>Draw_Matrix_Time</th>\n",
       "      <th>...</th>\n",
       "      <th>median_mmNorm</th>\n",
       "      <th>median_mmNorm_DTAPHib</th>\n",
       "      <th>median_mmNorm_PCV</th>\n",
       "      <th>PT_protected</th>\n",
       "      <th>Dip_protected</th>\n",
       "      <th>FHA_protected</th>\n",
       "      <th>PRN_protected</th>\n",
       "      <th>TET_protected</th>\n",
       "      <th>PRP (Hib)_protected</th>\n",
       "      <th>VR_group</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>005_HFX_HW_RAW_IMC_A1_106A</th>\n",
       "      <td>106A</td>\n",
       "      <td>A1</td>\n",
       "      <td>5</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>63</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>0.052874</td>\n",
       "      <td>0.061955</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>006_HFX_HW_RAW_IMC_A2_209A</th>\n",
       "      <td>209A</td>\n",
       "      <td>A2</td>\n",
       "      <td>6</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>55</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>0.102041</td>\n",
       "      <td>0.105087</td>\n",
       "      <td>0.074972</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>012_HFX_HW_RAW_IMC_A8_214A</th>\n",
       "      <td>214A</td>\n",
       "      <td>A8</td>\n",
       "      <td>12</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>89</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>0.310899</td>\n",
       "      <td>0.310899</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>021_HFX_HW_RAW_IMC_C3_227A</th>\n",
       "      <td>227A</td>\n",
       "      <td>C3</td>\n",
       "      <td>21</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>57</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>0.191269</td>\n",
       "      <td>0.205742</td>\n",
       "      <td>0.139576</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>026_HFX_HW_RAW_IMC_C8_208A</th>\n",
       "      <td>208A</td>\n",
       "      <td>C8</td>\n",
       "      <td>26</td>\n",
       "      <td>Well Check 02m</td>\n",
       "      <td>V5</td>\n",
       "      <td>63</td>\n",
       "      <td>Heelstick</td>\n",
       "      <td>Serum</td>\n",
       "      <td>Heelstick_Serum</td>\n",
       "      <td>V5_Heelstick_Serum</td>\n",
       "      <td>...</td>\n",
       "      <td>0.125416</td>\n",
       "      <td>0.113055</td>\n",
       "      <td>0.125416</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>NVR</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 65 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                           BioID Well Run Number        Hospital VisitCode  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  106A   A1          5  Well Check 02m        V5   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  209A   A2          6  Well Check 02m        V5   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  214A   A8         12  Well Check 02m        V5   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  227A   C3         21  Well Check 02m        V5   \n",
       "026_HFX_HW_RAW_IMC_C8_208A  208A   C8         26  Well Check 02m        V5   \n",
       "\n",
       "                           Age       Draw Matrix      Draw_Matrix  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  63  Heelstick  Serum  Heelstick_Serum   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  55  Heelstick  Serum  Heelstick_Serum   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  89  Heelstick  Serum  Heelstick_Serum   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  57  Heelstick  Serum  Heelstick_Serum   \n",
       "026_HFX_HW_RAW_IMC_C8_208A  63  Heelstick  Serum  Heelstick_Serum   \n",
       "\n",
       "                              Draw_Matrix_Time  ... median_mmNorm  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A  V5_Heelstick_Serum  ...      0.061955   \n",
       "006_HFX_HW_RAW_IMC_A2_209A  V5_Heelstick_Serum  ...      0.102041   \n",
       "012_HFX_HW_RAW_IMC_A8_214A  V5_Heelstick_Serum  ...      0.310899   \n",
       "021_HFX_HW_RAW_IMC_C3_227A  V5_Heelstick_Serum  ...      0.191269   \n",
       "026_HFX_HW_RAW_IMC_C8_208A  V5_Heelstick_Serum  ...      0.125416   \n",
       "\n",
       "                           median_mmNorm_DTAPHib median_mmNorm_PCV  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A              0.052874          0.061955   \n",
       "006_HFX_HW_RAW_IMC_A2_209A              0.105087          0.074972   \n",
       "012_HFX_HW_RAW_IMC_A8_214A              0.310899               NaN   \n",
       "021_HFX_HW_RAW_IMC_C3_227A              0.205742          0.139576   \n",
       "026_HFX_HW_RAW_IMC_C8_208A              0.113055          0.125416   \n",
       "\n",
       "                           PT_protected Dip_protected FHA_protected  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A        False          True          True   \n",
       "006_HFX_HW_RAW_IMC_A2_209A        False          True         False   \n",
       "012_HFX_HW_RAW_IMC_A8_214A         True          True          True   \n",
       "021_HFX_HW_RAW_IMC_C3_227A        False          True          True   \n",
       "026_HFX_HW_RAW_IMC_C8_208A        False          True          True   \n",
       "\n",
       "                            PRN_protected TET_protected PRP (Hib)_protected  \\\n",
       "005_HFX_HW_RAW_IMC_A1_106A          False          True                True   \n",
       "006_HFX_HW_RAW_IMC_A2_209A          False          True                True   \n",
       "012_HFX_HW_RAW_IMC_A8_214A           True          True                True   \n",
       "021_HFX_HW_RAW_IMC_C3_227A           True          True                True   \n",
       "026_HFX_HW_RAW_IMC_C8_208A          False          True                True   \n",
       "\n",
       "                           VR_group  \n",
       "005_HFX_HW_RAW_IMC_A1_106A      NVR  \n",
       "006_HFX_HW_RAW_IMC_A2_209A      NVR  \n",
       "012_HFX_HW_RAW_IMC_A8_214A      NVR  \n",
       "021_HFX_HW_RAW_IMC_C3_227A      NVR  \n",
       "026_HFX_HW_RAW_IMC_C8_208A      NVR  \n",
       "\n",
       "[5 rows x 65 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta = pd.concat([meta_base, per_sample_titer_data], axis=1)\n",
    "meta = meta.loc[~pd.isna(meta['VR_group'])]\n",
    "meta.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "639d7691-4823-4d09-b659-14128b41c750",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(47, 65)\n"
     ]
    }
   ],
   "source": [
    "in_both = list(set(meta.index) & set(proteomics.index))\n",
    "meta_matched = meta.loc[in_both]\n",
    "print(meta_matched.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5217001f-d927-4245-ace0-36f9879b5226",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "V5    45\n",
       "V6     2\n",
       "Name: VisitCode, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta_matched['VisitCode'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27938a15-31b3-4d92-bf41-44b6fe6c9135",
   "metadata": {},
   "source": [
    "47 samples are in the metadata and proteomics data. Of those two are V6. We will filter those out."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "834373d2-6bf7-402e-b7c0-2f28b62f6c2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_v5 = meta_matched.query(\"VisitCode == 'V5'\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "46190c13-2458-4d76-8fb9-25d077252677",
   "metadata": {},
   "source": [
    "## Do correlation with un-normalized data\n",
    "\n",
    "We will do our correlations between proteomics and median titer. I used Spearman correlation and Benjamini-Hochberg for FDR correction per set of tests."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fd9e7a01-b3d6-4ffc-862b-fe6022b05af4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>protein</th>\n",
       "      <th>LVR_mean</th>\n",
       "      <th>NVR_mean</th>\n",
       "      <th>statistic</th>\n",
       "      <th>p_value</th>\n",
       "      <th>p_adj</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>sp|P02750|A2GL_HUMAN</td>\n",
       "      <td>23.149189</td>\n",
       "      <td>22.660771</td>\n",
       "      <td>247.0</td>\n",
       "      <td>0.002204</td>\n",
       "      <td>0.492957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>sp|P02652|APOA2_HUMAN</td>\n",
       "      <td>27.290325</td>\n",
       "      <td>26.795589</td>\n",
       "      <td>242.0</td>\n",
       "      <td>0.003944</td>\n",
       "      <td>0.492957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>sp|P05156|CFAI_HUMAN</td>\n",
       "      <td>23.295402</td>\n",
       "      <td>23.556167</td>\n",
       "      <td>58.0</td>\n",
       "      <td>0.006083</td>\n",
       "      <td>0.506935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>CON__P00761</td>\n",
       "      <td>29.807311</td>\n",
       "      <td>29.472792</td>\n",
       "      <td>231.0</td>\n",
       "      <td>0.012217</td>\n",
       "      <td>0.610837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>sp|P08697|A2AP_HUMAN</td>\n",
       "      <td>25.159493</td>\n",
       "      <td>25.013711</td>\n",
       "      <td>231.0</td>\n",
       "      <td>0.012217</td>\n",
       "      <td>0.610837</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   protein   LVR_mean   NVR_mean  statistic   p_value  \\\n",
       "111   sp|P02750|A2GL_HUMAN  23.149189  22.660771      247.0  0.002204   \n",
       "98   sp|P02652|APOA2_HUMAN  27.290325  26.795589      242.0  0.003944   \n",
       "142   sp|P05156|CFAI_HUMAN  23.295402  23.556167       58.0  0.006083   \n",
       "0              CON__P00761  29.807311  29.472792      231.0  0.012217   \n",
       "164   sp|P08697|A2AP_HUMAN  25.159493  25.013711      231.0  0.012217   \n",
       "\n",
       "        p_adj  \n",
       "111  0.492957  \n",
       "98   0.492957  \n",
       "142  0.506935  \n",
       "0    0.610837  \n",
       "164  0.610837  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "proteomics_v5 = proteomics.loc[meta_v5.index]\n",
    "proteomics_stats_v5_rows = list()\n",
    "for protein, row in proteomics_v5.transpose().iterrows():\n",
    "    lvr_abunds = row[meta_v5.query('VR_group == \"LVR\"').index]\n",
    "    nvr_abunds = row[meta_v5.query('VR_group == \"NVR\"').index]\n",
    "    # check for not all zeros\n",
    "    # lvr_gt_20 = (lvr_abunds != 0).sum()/len(lvr_abunds) > .2\n",
    "    # nvr_gt_20 = (nvr_abunds != 0).sum()/len(nvr_abunds) > .2\n",
    "    lvr_gt_20 = lvr_abunds.sum()/len(lvr_abunds) > 10\n",
    "    nvr_gt_20 = nvr_abunds.sum()/len(nvr_abunds) > 10\n",
    "    if lvr_gt_20 or nvr_gt_20:\n",
    "        stat, p_value = mannwhitneyu(lvr_abunds, nvr_abunds)\n",
    "        proteomics_stats_v5_rows.append([protein, lvr_abunds.mean(), nvr_abunds.mean(), stat, p_value])\n",
    "proteomics_stats_v5 = pd.DataFrame(proteomics_stats_v5_rows, columns=['protein', 'LVR_mean', 'NVR_mean', 'statistic', 'p_value']).sort_values('p_value')\n",
    "proteomics_stats_v5['p_adj'] = p_adjust(proteomics_stats_v5['p_value'])\n",
    "proteomics_stats_v5.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "800b284f-5d99-4587-afab-38ab0e3d663c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
